* Firm organization with multiple establishments
* Section II.C, Tables I, II, Table A.16

clear all 
set matsize 2000
set more off

capture log close
log using log/01_desc-cs_descriptives.log, replace

use untid betnr jahr persnr beruf beruf2010 tentgelt w93_3_gen w08_5 ao_kreis ao_bula ///
	hauptbet d_educ lnw limit cens if jahr == 2012 using data/Panel.dta, clear
desc

order limit cens lnw, after(tentgelt)
	
********************************************************************************
***	Sample restriction *********************************************************
********************************************************************************

*	Minimum firm size
bys untid jahr: egen empl_unt = count(persnr)
qui keep if empl_unt >= 10

*	Cleaning: social security limit
qui replace tentgelt = limit if cens == 1
drop limit cens
qui replace lnw = ln(tentgelt) 		
count if lnw == .

********************************************************************************
***	Establishment characteristics **********************************************
********************************************************************************

bys betnr jahr: egen empl_bet = count(persnr)

merge m:1 betnr jahr using data/BHPinclUntID.dta
qui keep if _merge == 3
drop _merge

drop count_est_siab chge_est entry_unt jahr_eins_final jahr_eins_est az_vz eintritt ///
	betnr_vor untid_vor verysmall matchtype d_multest_4 ever_multest_4 always_multest_4 vorg_died ///
	mode austritt betnr_nach nachf_willbenew untid_nach vorgaenger nachfolger Sales ValueAdded ///
	erst_jahr_eintritt jahr_lzt_est first_year_ME entry_dyn exit_dyn exit_unt add_est mode_unt sector sector2 hq_sector

order hq_kreis, after(ao_bula)
	
********************************************************************************
***	Layer classification based on KldB2010, managerial organization ************
********************************************************************************

merge m:1 beruf2010 using data/KldB2010_LayerCMRHFriedrich.dta
qui drop if _merge == 2 
drop _merge

// manual adjustments based on occupation name
qui replace layer_neuAG = 3 if beruf2010 == 73294
qui replace layer_neuAG = 2 if beruf2010 == 71224
qui replace layer_neuAG = 0 if beruf2010 == 1402
qui replace layer_neuAG = 0 if beruf2010 == 1302
qui replace layer_neuAG = 0 if beruf2010 == 1203
qui replace layer_neuAG = 1 if beruf2010 == 1104

rename layer_neuAG layer

merge m:1 beruf using data/KldB1988_Blossfeld.dta
qui drop if _merge == 2
drop _merge

bys untid jahr: egen lowest = min(layer)
bys untid jahr: egen second_lowest = min(layer) if layer > lowest
bys untid jahr: egen third_lowest = min(layer) if layer > second_lowest
bys untid jahr: egen highest = min(layer) if layer > third_lowest

gen layer_rank = .
replace layer_rank = 0 if layer == lowest
replace layer_rank = 1 if layer == second_lowest & layer_rank == .
replace layer_rank = 2 if layer == third_lowest & layer_rank == .
replace layer_rank = 3 if layer == highest & layer_rank == .

//	Number of managerial layers
egen flg_untjhrlay = tag(untid jahr layer_rank)
qui replace flg_untjhrlay = 0 if layer_rank == 0
bys untid jahr: egen count_mgmt_unt = total(flg_untjhrlay)
drop flg_untjhrlay

egen flg_estjhrlay = tag(betnr jahr layer_rank)
qui replace flg_estjhrlay = 0 if layer_rank == 0
bys betnr jahr: egen count_mgmt_bet = total(flg_estjhrlay)
drop flg_estjhrlay

//	Plausibility check
egen flg_untjhrlay = tag(untid jahr layer)
bys untid jahr: egen count_mgmt_unt2 = total(flg_untjhrlay)
qui replace count_mgmt_unt2 = count_mgmt_unt2 - 1
count if count_mgmt_unt != count_mgmt_unt2
drop flg_untjhrlay count_mgmt_unt2

//	Management share (employment)
gen mgmt_lay = (layer_rank > 0)
bys betnr jahr: egen nbr_mgmt_bet = total(mgmt_lay)
bys untid jahr: egen nbr_mgmt_unt = total(mgmt_lay)
gen shr_mgmt_bet = (nbr_mgmt_bet / empl_bet) * 100
gen shr_mgmt_unt = (nbr_mgmt_unt / empl_unt) * 100
gen prdt_wkrs_unt = empl_unt - nbr_mgmt_unt
gen prdt_wkrs_bet = empl_bet - nbr_mgmt_bet
drop nbr_mgmt*

//	Management share (wage sum)
bys untid jahr: egen wage_sum_unt = total(tentgelt)
bys untid jahr: egen wage_sum_mgmt = total(mgmt_lay * tentgelt)
gen shr_mgmt_w_unt = (wage_sum_mgmt / wage_sum_unt) * 100
bys betnr jahr: egen wage_sum_bet = total(tentgelt)
bys betnr jahr: egen wage_sum_mgmt_bet = total(mgmt_lay * tentgelt)
gen shr_mgmt_w_bet = (wage_sum_mgmt_bet / wage_sum_bet) * 100

//	Management share (Blossfeld)
gen d_mgr = (blossfeld == 12)
bys betnr jahr: egen shr_mgmt_bet_bloss = mean(d_mgr * 100)
bys untid jahr: egen shr_mgmt_unt_bloss = mean(d_mgr * 100)
bys untid jahr: egen wage_unt_bloss = total(d_mgr * tentgelt)
gen shr_bloss_w_unt = (wage_unt_bloss / wage_sum_unt) * 100
bys betnr jahr: egen wage_bet_bloss = total(d_mgr * tentgelt)
gen shr_bloss_w_bet = (wage_bet_bloss / wage_sum_bet) * 100
drop wage_sum_*
drop d_mgr

********************************************************************************
//	Consecutive vs. non-consecutive organizational structures: firm

sort untid jahr
qui {
	forvalues l = 0/3 {
		egen e_`l' = anymatch(layer), values(`l')
		by untid jahr: egen count_layer`l' = sum(e_`l')
		by untid jahr: gen l_`l' = (count_layer`l' > 0)
		label variable l_`l' "Dummy firm has layer `l'"
		capture drop e_`l' count_layer`l'
	}
}

gen comb_layer_unt = l_3 * 1000 + l_2 * 100 + l_1 * 10 + l_0 
label variable comb_layer_unt "Pattern of layers, from high to low, firm level"
label define structure 1 "0" 10 "1" 100 "2" 1000 "3" 11 "0 + 1" 101 "0 + 2" 1001 "0 + 3" ///
	110 "1 + 2" 1010 "1 + 3" 1100 "2 + 3" 111 "0 + 1 + 2" 1101 "0 + 2 + 3" 1011 "0 + 1 + 3" ///
	1110 "1 + 2 + 3" 1111 "All layers"
label values comb_layer_unt structure
capture drop l_*

cap drop flg_untjhr
cap drop flg_estjhr
egen flg_untjhr = tag(untid jahr)
egen flg_estjhr = tag(betnr jahr)
bys untid jahr: egen count_est = total(flg_estjhr)
gen d_multest = (count_est > 1)

forvalues i = 0/3 {
	di as text "# layers: " `i'
	tab comb_layer_unt if flg_untjhr == 1 & count_mgmt_unt == `i', m
	tab comb_layer_unt if flg_untjhr == 1 & count_mgmt_unt == `i' & d_multest == 0, m
	tab comb_layer_unt if flg_untjhr == 1 & count_mgmt_unt == `i' & d_multest == 1, m
}

//	Consecutive vs. non-consecutive organizational structures: establishment
capture drop e_*
capture drop count_layer*

sort betnr jahr
qui {
	forvalues l = 0/3 {
		egen e_`l' = anymatch(layer), values(`l')
		by betnr jahr: egen count_layer`l' = sum(e_`l')
		by betnr jahr: gen l_`l' = (count_layer`l' > 0)
		label variable l_`l' "Dummy estab. has layer `l'"
		capture drop e_`l' count_layer`l'
	}
}

capture drop comb_layer
gen comb_layer = l_3 * 1000 + l_2 * 100 + l_1 * 10 + l_0 
label variable comb_layer "Pattern of layers, from high to low, estab. level"
label values comb_layer structure

forvalues i = 0/3 {
	di as text "# layers: " `i'
	tab comb_layer if flg_estjhr == 1 & count_mgmt_bet == `i', m
	tab comb_layer if flg_estjhr == 1 & count_mgmt_bet == `i' & d_multest == 0, m
	tab comb_layer if flg_estjhr == 1 & count_mgmt_bet == `i' & d_multest == 1, m
}

********************************************************************************

********************************************************************************
***	Establishment characteristics (ctd.) ***************************************
********************************************************************************

qui keep if flg_estjhr == 1
drop flg_estjhr

// Drop person-specific variables
drop persnr beruf beruf2010 tentgelt lnw layer layer_neu ///
		lowest second_lowest third_lowest highest layer_rank mgmt_lay blossfeld 

capture drop w08_3
qui gen w08_3 = int(w08_5 / 100)
order w08_3, after(w08_5)

egen flg_wz = tag(untid jahr w08_3)
bys untid jahr: egen count_wz = total(flg_wz)
drop flg_wz

tab2 d_multest hauptbet, m
replace hauptbet = 0 if d_multest == 0

qui gen aux_hq_wz = w08_3 if hauptbet == 1
bys untid jahr: egen hq_wz = min(aux_hq_wz)
count if hq_wz != . & d_multest == 0
drop aux_hq_wz

replace hq_wz = w08_3 if d_multest == 0
count if hq_wz == .

//	Sector groups
capture drop hq_sector
qui gen hq_sector = .
qui replace hq_sector =  1 if hq_wz <   50	// A Agriculture
qui replace hq_sector =  2 if hq_wz >=  50 & hq_wz < 100 // B Mining and quarrying
qui replace hq_sector =  3 if hq_wz >= 100 & hq_wz < 350 // C Manufacturing 
qui replace hq_sector =  4 if hq_wz >= 350 & hq_wz < 360 // D Electricity
qui replace hq_sector =  5 if hq_wz >= 360 & hq_wz < 410 // E Water/Sewage
qui replace hq_sector =  6 if hq_wz >= 410 & hq_wz < 450 // F Construction
qui replace hq_sector =  7 if hq_wz >= 450 & hq_wz < 490 // G Retail/Wholesale 
qui replace hq_sector =  8 if hq_wz >= 490 & hq_wz < 550 // H Traffic
qui replace hq_sector =  9 if hq_wz >= 550 & hq_wz < 580 // I Hotels
qui replace hq_sector = 10 if hq_wz >= 580 & hq_wz < 640 // J Information and Communication
qui replace hq_sector = 11 if hq_wz >= 640 & hq_wz < 680 // K Finance
qui replace hq_sector = 12 if hq_wz >= 680 & hq_wz < 690 // L Real Estate 
qui replace hq_sector = 13 if hq_wz >= 690 & hq_wz < 770 // M Services
qui replace hq_sector = 14 if hq_wz >= 770 & hq_wz < 840 // N Other services
qui replace hq_sector = 15 if hq_wz >= 840 & hq_wz < 850 // O Public administration
qui replace hq_sector = 16 if hq_wz >= 850 & hq_wz < 860 // P Education 
qui replace hq_sector = 17 if hq_wz >= 860 & hq_wz < 900 // Q Health
qui replace hq_sector = 18 if hq_wz >= 900 & hq_wz < 940 // R Art and entertainment
qui replace hq_sector = 19 if hq_wz >= 940 & hq_wz < 970 // S Other services
qui replace hq_sector = 20 if hq_wz >= 970 & hq_wz < 990 // T Private Households
qui replace hq_sector = 21 if hq_wz >= 990 & hq_wz != . // U Extraterritorial Organizations

drop w08_5 ao_bula legal_cat

********************************************************************************
***	TABLE II: Descriptive statistics, ME firms, lower panel

foreach v in empl_bet count_mgmt_bet shr_mgmt_w_bet shr_bloss_w_bet {
	display "-------- Variable: `v' --------"
	tabstat `v' if d_multest == 1, by(hauptbet) s(n mean sd p5 p10 q p90 p95)
	ttest `v' if d_multest == 1, by(hauptbet)
}
********************************************************************************

********************************************************************************
***	Firm characteristics *******************************************************
********************************************************************************

merge m:1 untid jahr using data/Amadeus_Aug2018_untid.dta
qui drop if _merge == 2
drop _merge
drop NACE Empl OperRev ValueAdded ln_sales ln_operrev

bys untid jahr: egen max_dist = max(distance_all)

// Create information on area covered
preserve
qui keep if count_est > 2
keep untid jahr betnr ao_kreis
duplicates drop

merge m:1 ao_kreis using data/Middle_coordinates_max-pop.dta
drop if _merge == 2
drop _merge

gen double aux = untid * 10000
gen double untjhr = aux + jahr

fieldarea lon lat, id(untjhr) generate(area) unit(sqkm)
tabstat area, s(n mean sd min p10 q p90 max)
// Germany has 360,000 sqkm

save data/MEfirm_area_cs.dta, replace
restore

gen double aux = untid * 10000
gen double untjhr = aux + jahr
drop aux

cap drop flg_untjhr
egen flg_untjhr = tag(untid jahr)

merge m:1 untjhr using data/MEfirm_area_cs.dta
drop if _merge == 2
tab count_est if _merge == 1 & flg_untjhr == 1, m sort
drop _merge
drop untjhr

// Legal form
qui gen legal_form = .
qui replace legal_form = 1 if (Rechtsform_neu == 8 | Rechtsform_neu == 16 | Rechtsform_neu == 17 | Rechtsform_neu == 19 | Rechtsform_neu == 29)
qui replace legal_form = 2 if (Rechtsform_neu == 3 | Rechtsform_neu == 11 | Rechtsform_neu == 12 | Rechtsform_neu == 14 | Rechtsform_neu == 15)
qui replace legal_form = 3 if (Rechtsform_neu == 13)
qui replace legal_form = 4 if (Rechtsform_neu == 5 | Rechtsform_neu == 22 | Rechtsform_neu == 23 | Rechtsform_neu == 24)
label define legal 1 "Einzelunternehmen" 2 "GmbH & coKG" 3 "GmbH" 4 "AG"
label values legal_form legal

********************************************************************************
*** TABLE I: Descriptive statistics, SE vs. ME firms

***	Upper panel
tab d_multest if flg_untjhr == 1, m // firm
tab d_multest if flg_untjhr == 1 & Sales != ., m
tab d_multest, m // establishments
tabstat empl_unt if flg_untjhr == 1, by(d_multest) c(s) s(N sum) format(%15.0g) // employees

***	Lower panel
foreach v in empl_unt Sales count_mgmt_unt shr_mgmt_w_unt shr_bloss_w_unt {
	display "-------- Variable: `v' --------"
	tabstat `v' if flg_untjhr == 1, by(d_multest) s(n mean sd p5 p10 q p90 p95)
	ttest `v' if flg_untjhr == 1, by(d_multest)
}

********************************************************************************
*** TABLE II: Descriptive statistics, ME firms, upper panel

tabstat count_est max_dist area if flg_untjhr == 1 & d_multest == 1, c(s) s(n mean sd p5 p10 q p90 p95)

********************************************************************************
*** TABLE A.16: Share of ME firms in all firms, establishments and employment by sector

forvalues i = 1/21 {
	display ""
	display "********************************************************************************"
	display "HQ sector: " `i'
	count if hq_sector == `i' 
	if r(N) > 0 {
		tab d_multest if flg_untjhr == 1 & hq_sector == `i', m // firm
		tab d_multest if hq_sector == `i', m // establishment
		tabstat empl_unt if flg_untjhr == 1 & hq_sector == `i', by(d_multest) c(s) s(N sum) format(%15.0g) // employees
	}
}
	
*************************************************************************************

log close
